In [1]:
import pysal as ps
import numpy as np
%pylab inline
In [2]:
f = ps.open(ps.examples.get_path('usjoin.csv'), 'r')
To determine what is in the file, check the header attribute on the file object:
In [3]:
f.header
Out[3]:
Ok, lets pull in the name variable to see what we have
In [4]:
name = f.by_col('Name')
In [5]:
name
Out[5]:
Now obtain per capital incomes in 1929 which is in the column associated with 1929
In [6]:
y1929 = f.by_col('1929')
In [7]:
y1929
Out[7]:
And now 2009
In [8]:
y2009 = f.by_col("2009")
In [9]:
y2009
Out[9]:
These are read into regular Python lists which are not particularly well suited to efficient data analysis. So let's convert them to numpy arrays
In [10]:
y2009 = np.array(y2009)
In [11]:
y2009
Out[11]:
Much better. But pulling these in and converting them a column at a time is tedious and error prone. So we will do all of this in a list comprehension.
In [12]:
Y = np.array( [ f.by_col(str(year)) for year in range(1929,2010) ] ) * 1.0
In [13]:
Y.shape
Out[13]:
In [14]:
Y = Y.transpose()
In [15]:
Y.shape
Out[15]:
In [16]:
years = np.arange(1929,2010)
In [17]:
plot(years,Y[0])
Out[17]:
In [18]:
RY = Y / Y.mean(axis=0)
In [19]:
plot(years,RY[0])
Out[19]:
In [20]:
name = np.array(name)
In [21]:
np.nonzero(name=='Ohio')
Out[21]:
In [22]:
plot(years, RY[32], label='Ohio')
plot(years, RY[0], label='Alabama')
legend()
Out[22]:
In [23]:
for row in RY:
plot(years, row)
In [24]:
from scipy.stats.kde import gaussian_kde
In [25]:
density = gaussian_kde(Y[:,0])
In [26]:
Y[:,0]
Out[26]:
In [27]:
density = gaussian_kde(Y[:,0])
In [28]:
minY0 = Y[:,0].min()*.90
maxY0 = Y[:,0].max()*1.10
x = np.linspace(minY0, maxY0, 100)
In [29]:
plot(x,density(x))
Out[29]:
In [30]:
d2009 = gaussian_kde(Y[:,-1])
In [31]:
minY0 = Y[:,-1].min()*.90
maxY0 = Y[:,-1].max()*1.10
x = np.linspace(minY0, maxY0, 100)
In [32]:
plot(x,d2009(x))
Out[32]:
In [33]:
minR0 = RY.min()
In [34]:
maxR0 = RY.max()
In [35]:
x = np.linspace(minR0, maxR0, 100)
In [36]:
d1929 = gaussian_kde(RY[:,0])
In [37]:
d2009 = gaussian_kde(RY[:,-1])
In [38]:
plot(x, d1929(x))
plot(x, d2009(x))
Out[38]:
In [39]:
plot(x, d1929(x), label='1929')
plot(x, d2009(x), label='2009')
legend()
Out[39]:
In [40]:
for cs in RY.T: # take cross sections
plot(x, gaussian_kde(cs)(x))
In [41]:
sigma = RY.std(axis=0)
plot(years, sigma)
ylabel('s')
xlabel('year')
title("Sigma-Convergence")
Out[41]:
So the distribution is becoming less dispersed over time.
But what about internal mixing? Do poor (rich) states remain poor (rich), or is there movement within the distribuiton over time?
In [42]:
c = np.array([['b','a','c'],
['c','c','a'],
['c','b','c'],
['a','a','b'],
['a','b','c']])
In [43]:
c
Out[43]:
In [44]:
m = ps.Markov(c)
In [45]:
m.classes
Out[45]:
In [46]:
m.transitions
Out[46]:
In [47]:
m.p
Out[47]:
In [48]:
f = ps.open(ps.examples.get_path("usjoin.csv"))
pci = np.array([f.by_col[str(y)] for y in range(1929,2010)])
pci.shape
Out[48]:
Put series into cross-sectional quintiles (i.e., quintiles for each year)
In [49]:
q5 = np.array([ps.Quantiles(y).yb for y in pci]).transpose()
In [50]:
q5.shape
Out[50]:
In [51]:
q5[:,0]
Out[51]:
In [52]:
pci.shape
Out[52]:
In [53]:
pci[0]
Out[53]:
we are looping over the rows of y which is ordered Txn (rows are cross sections, row 0 is the cross-section for period 0
In [54]:
m5 = ps.Markov(q5)
In [55]:
m5.classes
Out[55]:
In [56]:
m5.transitions
Out[56]:
In [57]:
m5.p
Out[57]:
In [58]:
m5.steady_state
Out[58]:
In [59]:
fmpt = ps.ergodic.fmpt(m5.p)
In [60]:
fmpt
Out[60]:
For a state with income in the first quintile, it takes on average 11.5 years for it to first enter the second quintile, 29.6 to get to the third quintile, 53.4 years to enter the fourth, and 103.6 years to reach the richest quintile.
But, this approach assumes the movement of a state in the income distribution is independent of the movement of its neighbors or the position of the neighbors in the distribution. Does spatial context matter?
Read in a GAL file to construct our W
In [61]:
w = ps.open(ps.examples.get_path("states48.gal")).read()
w.n
w.transform = 'R'
In [62]:
mits = [ps.Moran(cs, w) for cs in RY.T]
In [63]:
res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])
In [64]:
plot(years, res[:,0], label='I')
plot(years, res[:,1], label='E[I]')
title("Moran's I")
legend()
Out[64]:
In [65]:
plot(years, res[:,-1])
ylim(0,7.0)
title('z-values, I')
Out[65]:
In [66]:
pci.shape
Out[66]:
In [67]:
pci = pci.T
In [68]:
pci.shape
Out[68]:
In [69]:
rpci = pci / pci.mean(axis=0)
In [70]:
rpci[:,0]
Out[70]:
In [71]:
rpci[:,0].mean()
Out[71]:
In [72]:
sm = ps.Spatial_Markov(rpci, w, fixed=True, k=5)
In [73]:
sm.p
Out[73]:
In [74]:
for p in sm.P:
print p
In [75]:
sm.S
Out[75]:
In [76]:
for f in sm.F:
print f
In [77]:
lm = ps.LISA_Markov(pci,w)
In [78]:
lm.classes
Out[78]:
In [79]:
lm.transitions
Out[79]:
In [80]:
np.set_printoptions(3, suppress=True)
In [81]:
lm.transitions
Out[81]:
In [82]:
lm.p
Out[82]:
In [83]:
lm.steady_state
Out[83]:
In [84]:
ps.ergodic.fmpt(lm.p)
Out[84]:
In [85]:
lm.transitions
Out[85]:
In [86]:
lm.expected_t
Out[86]:
In [87]:
lm.chi_2
Out[87]: